package com.yahoo.glimmer.query; /* * Copyright (c) 2012 Yahoo! Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * See accompanying LICENSE file. */ import java.io.IOException; import java.io.InputStream; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.Map; import java.util.Set; import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLClassExpression; import org.semanticweb.owlapi.model.OWLDataProperty; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.OWLOntologyManager; import org.semanticweb.owlapi.model.OWLProperty; import com.yahoo.glimmer.query.RDFIndexStatistics.ClassStat; import com.yahoo.glimmer.util.Util; import com.yahoo.glimmer.vocabulary.OwlUtils; public class RDFIndexStatisticsBuilder { private Map<String, String> sortedPredicates = Collections.emptyMap(); private Map<String, Integer> typeTermDistribution = Collections.emptyMap(); private OWLOntology ontology; private Map<String, Integer> predicateTermDistribution = Collections.emptyMap(); public RDFIndexStatisticsBuilder setSortedPredicates(final Map<String, String> sortedPredicates) { this.sortedPredicates = sortedPredicates; return this; } public RDFIndexStatisticsBuilder setTypeTermDistribution(final Map<String, Integer> typeTermDistribution) { this.typeTermDistribution = typeTermDistribution; return this; } public RDFIndexStatisticsBuilder setOwlOntologyInputStream(InputStream owlOntologgyInputStream) throws IOException { OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); try { ontology = manager.loadOntologyFromOntologyDocument(owlOntologgyInputStream); } catch (OWLOntologyCreationException e) { throw new IllegalArgumentException("Ontology failed to load:" + e.getMessage()); } owlOntologgyInputStream.close(); return this; } public RDFIndexStatisticsBuilder setPredicateTermDistribution(Map<String, Integer> predicateTermDistribution) { this.predicateTermDistribution = new HashMap<String, Integer>(); for (String key : predicateTermDistribution.keySet()) { Integer count = predicateTermDistribution.get(key); if (key != null && count != null) { this.predicateTermDistribution.put(Util.removeVersion(key), count); } } return this; } public RDFIndexStatistics build() { RDFIndexStatistics stats = new RDFIndexStatistics(); stats.setFields(sortedPredicates); // Capture basic statistics about class frequency for (String clazzName : typeTermDistribution.keySet()) { Integer count = typeTermDistribution.get(clazzName); String localName = OwlUtils.getLocalName(IRI.create(clazzName)); stats.addClassStat(Util.removeVersion(clazzName), new ClassStat(localName, count)); } if (ontology != null && stats.getClasses() != null) { Map<String, OWLClass> nameToOwlClassMap = new HashMap<String, OWLClass>(); // Populate owlToStatClassMap and set labels and properties on // ClassStat instances. for (String clazzName : stats.getClasses().keySet()) { OWLClass owlClass = null; // Remove version if the class name contains a version // number if (ontology.containsClassInSignature(IRI.create(clazzName))) { owlClass = ontology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create(clazzName)); } else { owlClass = ontology.getOWLOntologyManager().getOWLDataFactory().getOWLClass(IRI.create(Util.removeVersion(clazzName))); } if (owlClass != null) { ClassStat stat = stats.getClasses().get(clazzName); stat.setLabel(OwlUtils.getLabel(owlClass, ontology)); for (OWLProperty<?, ?> prop : OwlUtils.getPropertiesInDomain(owlClass, ontology)) { if (prop instanceof OWLDataProperty) { String name = prop.getIRI().toString(); stat.addProperty(name); String encodeName = com.yahoo.glimmer.util.Util.encodeFieldName(Util.removeVersion(name)); Integer predicateCount = predicateTermDistribution.get(encodeName); if (predicateCount != null) { stats.addPropertyStat(name, predicateCount); } } } nameToOwlClassMap.put(owlClass.getIRI().toString(), owlClass); } else { System.err.println("Indexed type not in the ontology: " + clazzName); } } // Build tree and get root classes. Set<String> rootClassNames = new HashSet<String>(); HashSet<String> classNamesInIndex = new HashSet<String>(stats.getClasses().keySet()); for (String className : classNamesInIndex) { buildGraph(stats, nameToOwlClassMap, rootClassNames, className); } for (String rootClassName : rootClassNames) { stats.addRootClass(rootClassName); } // Propagate properties from ancestors to decendents. LinkedList<String> fifo = new LinkedList<String>(stats.getRootClasses()); while (!fifo.isEmpty()) { String className = fifo.remove(); ClassStat classStat = stats.getClasses().get(className); // Add classStat's properties to it direct children and then queue them. if (classStat.getChildren() != null) { for (String childClassName : classStat.getChildren()) { ClassStat childClassStat = stats.getClasses().get(childClassName); childClassStat.addProperties(classStat.getProperties()); fifo.add(childClassName); } } } } return stats; } /** * For the given OWLClass traverse the Ontology graph to it's roots creating * intermediate ClassStat objects for missing super classes and adding * ClassStat child names. Roots here refers to super classes that aren't sub * classes of any other class. * * @param ontology2 * * @param onto * @param stats * @param rootClassNames * @param owlToStatClassMap * @param rootClasses * @param owlClass * * TODO cyclic detection. */ private void buildGraph(RDFIndexStatistics stats, Map<String, OWLClass> nameToOwlClassMap, Set<String> rootClassNames, String owlClassName) { int superClassCount = 0; OWLClass owlClass = nameToOwlClassMap.get(owlClassName); for (OWLClassExpression superOwlExpression : owlClass.getSuperClasses(ontology)) { if (superOwlExpression instanceof OWLClass) { OWLClass superOwlClass = (OWLClass) superOwlExpression; String superOwlClassName = superOwlClass.getIRI().toString(); ClassStat superStat = stats.getClasses().get(superOwlClassName); if (superStat == null) { // Is is possible the the super class doesn't have a // ClassStat object as we start with only // ClassStat objects for things that are indexed. String superLocalName = OwlUtils.getLocalName(superOwlClass.getIRI()); superStat = new ClassStat(superLocalName,0); superStat.setLabel(OwlUtils.getLabel(superOwlClass, ontology)); for (OWLProperty<?, ?> prop : OwlUtils.getPropertiesInDomain(superOwlClass, ontology)) { if (prop instanceof OWLDataProperty) { String name = prop.getIRI().toString(); superStat.addProperty(name); } } stats.addClassStat(superOwlClassName, superStat); nameToOwlClassMap.put(superOwlClassName, superOwlClass); } // Add this owlClass as a child of the superOwlClass superStat.addChild(owlClass.getIRI().toString()); buildGraph(stats, nameToOwlClassMap, rootClassNames, superOwlClassName); superClassCount++; } } if (superClassCount == 0) { rootClassNames.add(owlClassName); } } public static String toString(RDFIndexStatistics stats) { StringBuilder sb = new StringBuilder(); for (String rootClassName : stats.getRootClasses()) { print(0, stats, rootClassName, sb); } return sb.toString(); } private static void print(int depth, RDFIndexStatistics stats, String owlClassName, StringBuilder sb) { ClassStat stat = stats.getClasses().get(owlClassName); for (int i = 0; i < depth; i++) { sb.append('\t'); } sb.append(owlClassName); sb.append(':'); sb.append(stat.getCount()); sb.append('\n'); if (stat.getChildren() != null) { for (String childClassName : stat.getChildren()) { print(depth + 1, stats, childClassName, sb); } } } }